<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package	Zend_Search_Lucene
 * @subpackage Storage
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license	http://framework.zend.com/license/new-bsd	 New BSD License
 */



/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';


/**
 * @category   Zend
 * @package	Zend_Search_Lucene
 * @subpackage Storage
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license	http://framework.zend.com/license/new-bsd	 New BSD License
 */
abstract class Zend_Search_Lucene_Storage_File
{
	/**
	 * Reads $length number of bytes at the current position in the
	 * file and advances the file pointer.
	 *
	 * @param integer $length
	 * @return string
	 */
	abstract protected function _fread($length=1);


	/**
	 * Sets the file position indicator and advances the file pointer.
	 * The new position, measured in bytes from the beginning of the file,
	 * is obtained by adding offset to the position specified by whence,
	 * whose values are defined as follows:
	 * SEEK_SET - Set position equal to offset bytes.
	 * SEEK_CUR - Set position to current location plus offset.
	 * SEEK_END - Set position to end-of-file plus offset. (To move to
	 * a position before the end-of-file, you need to pass a negative value
	 * in offset.)
	 * Upon success, returns 0; otherwise, returns -1
	 *
	 * @param integer $offset
	 * @param integer $whence
	 * @return integer
	 */
	abstract public function seek($offset, $whence=SEEK_SET);

	/**
	 * Get file position.
	 *
	 * @return integer
	 */
	abstract public function tell();

	/**
	 * Flush output.
	 *
	 * Returns true on success or false on failure.
	 *
	 * @return boolean
	 */
	abstract public function flush();

	/**
	 * Writes $length number of bytes (all, if $length===null) to the end
	 * of the file.
	 *
	 * @param string $data
	 * @param integer $length
	 */
	abstract protected function _fwrite($data, $length=null);

	/**
	 * Lock file
	 *
	 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
	 *
	 * @param integer $lockType
	 * @return boolean
	 */
	abstract public function lock($lockType, $nonBlockinLock = false);

	/**
	 * Unlock file
	 */
	abstract public function unlock();

	/**
	 * Reads a byte from the current position in the file
	 * and advances the file pointer.
	 *
	 * @return integer
	 */
	public function readByte()
	{
		return ord($this->_fread(1));
	}

	/**
	 * Writes a byte to the end of the file.
	 *
	 * @param integer $byte
	 */
	public function writeByte($byte)
	{
		return $this->_fwrite(chr($byte), 1);
	}

	/**
	 * Read num bytes from the current position in the file
	 * and advances the file pointer.
	 *
	 * @param integer $num
	 * @return string
	 */
	public function readBytes($num)
	{
		return $this->_fread($num);
	}

	/**
	 * Writes num bytes of data (all, if $num===null) to the end
	 * of the string.
	 *
	 * @param string $data
	 * @param integer $num
	 */
	public function writeBytes($data, $num=null)
	{
		$this->_fwrite($data, $num);
	}


	/**
	 * Reads an integer from the current position in the file
	 * and advances the file pointer.
	 *
	 * @return integer
	 */
	public function readInt()
	{
		$str = $this->_fread(4);

		return  ord($str{0}) << 24 |
				ord($str{1}) << 16 |
				ord($str{2}) << 8  |
				ord($str{3});
	}


	/**
	 * Writes an integer to the end of file.
	 *
	 * @param integer $value
	 */
	public function writeInt($value)
	{
		settype($value, 'integer');
		$this->_fwrite( chr($value>>24 & 0xFF) .
						chr($value>>16 & 0xFF) .
						chr($value>>8  & 0xFF) .
						chr($value	 & 0xFF),   4  );
	}


	/**
	 * Returns a long integer from the current position in the file
	 * and advances the file pointer.
	 *
	 * @return integer
	 * @throws Zend_Search_Lucene_Exception
	 */
	public function readLong()
	{
		$str = $this->_fread(8);

		/**
		 * Check, that we work in 64-bit mode.
		 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
		 */
		if (PHP_INT_SIZE > 4) {
			return  ord($str{0}) << 56  |
					ord($str{1}) << 48  |
					ord($str{2}) << 40  |
					ord($str{3}) << 32  |
					ord($str{4}) << 24  |
					ord($str{5}) << 16  |
					ord($str{6}) << 8   |
					ord($str{7});
		} else {
			if ((ord($str{0})		  != 0) ||
				(ord($str{1})		  != 0) ||
				(ord($str{2})		  != 0) ||
				(ord($str{3})		  != 0) ||
				((ord($str{0}) & 0x80) != 0)) {
					 throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
				 }

			return  ord($str{4}) << 24  |
					ord($str{5}) << 16  |
					ord($str{6}) << 8   |
					ord($str{7});
		}
	}

	/**
	 * Writes long integer to the end of file
	 *
	 * @param integer $value
	 * @throws Zend_Search_Lucene_Exception
	 */
	public function writeLong($value)
	{
		/**
		 * Check, that we work in 64-bit mode.
		 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
		 */
		if (PHP_INT_SIZE > 4) {
			settype($value, 'integer');
			$this->_fwrite( chr($value>>56 & 0xFF) .
							chr($value>>48 & 0xFF) .
							chr($value>>40 & 0xFF) .
							chr($value>>32 & 0xFF) .
							chr($value>>24 & 0xFF) .
							chr($value>>16 & 0xFF) .
							chr($value>>8  & 0xFF) .
							chr($value	 & 0xFF),   8  );
		} else {
			if ($value > 0x7FFFFFFF) {
				throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
			}

			$this->_fwrite( "\x00\x00\x00\x00"	 .
							chr($value>>24 & 0xFF) .
							chr($value>>16 & 0xFF) .
							chr($value>>8  & 0xFF) .
							chr($value	 & 0xFF),   8  );
		}
	}



	/**
	 * Returns a variable-length integer from the current
	 * position in the file and advances the file pointer.
	 *
	 * @return integer
	 */
	public function readVInt()
	{
		$nextByte = ord($this->_fread(1));
		$val = $nextByte & 0x7F;

		for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
			$nextByte = ord($this->_fread(1));
			$val |= ($nextByte & 0x7F) << $shift;
		}
		return $val;
	}

	/**
	 * Writes a variable-length integer to the end of file.
	 *
	 * @param integer $value
	 */
	public function writeVInt($value)
	{
		settype($value, 'integer');
		while ($value > 0x7F) {
			$this->_fwrite(chr( ($value & 0x7F)|0x80 ));
			$value >>= 7;
		}
		$this->_fwrite(chr($value));
	}


	/**
	 * Reads a string from the current position in the file
	 * and advances the file pointer.
	 *
	 * @return string
	 */
	public function readString()
	{
		$strlen = $this->readVInt();
		if ($strlen == 0) {
			return '';
		} else {
			/**
			 * This implementation supports only Basic Multilingual Plane
			 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
			 * "supplementary characters" (characters whose code points are
			 * greater than 0xFFFF)
			 * Java 2 represents these characters as a pair of char (16-bit)
			 * values, the first from the high-surrogates range (0xD800-0xDBFF),
			 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
			 * they are encoded as usual UTF-8 characters in six bytes.
			 * Standard UTF-8 representation uses four bytes for supplementary
			 * characters.
			 */

			$str_val = $this->_fread($strlen);

			for ($count = 0; $count < $strlen; $count++ ) {
				if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
					$addBytes = 1;
					if (ord($str_val{$count}) & 0x20 ) {
						$addBytes++;

						// Never used. Java2 doesn't encode strings in four bytes
						if (ord($str_val{$count}) & 0x10 ) {
							$addBytes++;
						}
					}
					$str_val .= $this->_fread($addBytes);
					$strlen += $addBytes;

					// Check for null character. Java2 encodes null character
					// in two bytes.
					if (ord($str_val{$count})   == 0xC0 &&
						ord($str_val{$count+1}) == 0x80   ) {
						$str_val{$count} = 0;
						$str_val = substr($str_val,0,$count+1)
								 . substr($str_val,$count+2);
					}
					$count += $addBytes;
				}
			}

			return $str_val;
		}
	}

	/**
	 * Writes a string to the end of file.
	 *
	 * @param string $str
	 * @throws Zend_Search_Lucene_Exception
	 */
	public function writeString($str)
	{
		/**
		 * This implementation supports only Basic Multilingual Plane
		 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
		 * "supplementary characters" (characters whose code points are
		 * greater than 0xFFFF)
		 * Java 2 represents these characters as a pair of char (16-bit)
		 * values, the first from the high-surrogates range (0xD800-0xDBFF),
		 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
		 * they are encoded as usual UTF-8 characters in six bytes.
		 * Standard UTF-8 representation uses four bytes for supplementary
		 * characters.
		 */

		// convert input to a string before iterating string characters
		settype($str, 'string');

		$chars = $strlen = strlen($str);
		$containNullChars = false;

		for ($count = 0; $count < $strlen; $count++ ) {
			/**
			 * String is already in Java 2 representation.
			 * We should only calculate actual string length and replace
			 * \x00 by \xC0\x80
			 */
			if ((ord($str{$count}) & 0xC0) == 0xC0) {
				$addBytes = 1;
				if (ord($str{$count}) & 0x20 ) {
					$addBytes++;

					// Never used. Java2 doesn't encode strings in four bytes
					// and we dont't support non-BMP characters
					if (ord($str{$count}) & 0x10 ) {
						$addBytes++;
					}
				}
				$chars -= $addBytes;

				if (ord($str{$count}) == 0 ) {
					$containNullChars = true;
				}
				$count += $addBytes;
			}
		}

		if ($chars < 0) {
			throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
		}

		$this->writeVInt($chars);
		if ($containNullChars) {
			$this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
		} else {
			$this->_fwrite($str);
		}
	}


	/**
	 * Reads binary data from the current position in the file
	 * and advances the file pointer.
	 *
	 * @return string
	 */
	public function readBinary()
	{
		return $this->_fread($this->readVInt());
	}
}
